# -*- coding: utf-8 -*-
"""
Created on Tue Jun 16 06:29:09 2020

@author: HP
"""


import pandas as pd
import numpy as np
from scipy.stats import norm

'''
Summarize the results in rp_tests_by_participants.xlsx
Rows are a multi index with category first. Consists of values: all, not edge, edge
Second item in multi index is CCEI cutoff. Consists of values 100, 99, 95, 90
Column consists of the tests
'''


#=============================================================================
#INPUT FILE
#=============================================================================
infile = 'rp_tests_by_participants.xlsx'
indf = pd.read_excel( infile, sheet_name = 0, index_col = 0 )

#=============================================================================
#RESULT FILE
#=============================================================================

results_file = 'rp_tests_summary.xlsx'

#=============================================================================
#MAKE TABLE
#=============================================================================

#columns and row names
cats = ['all', 'edge41', 'edge0']
cat_varsin = ['edges', 'corners', 'cat' ]
escores = [100,99,95,90]

my_index = pd.MultiIndex.from_product( [cats, escores], names = ['category','escore'] )
var_names = [ 'def', 'le', 'tn', 'imp', 'pat', 'cn', 'ca', 'imple', 'imple_ca', \
              'imple_cs', 'imple_si', 'imple_sp', 'imple_ca_si', \
              'imple_ca_sp', 'imple_cs_si', 'imple_cs_sp', 'imp2split', \
              'imple_imp2split', 'PQR_risk', 'PQR_time', 'N']

    
outdf = pd.DataFrame(data=np.nan, index = my_index, columns = var_names)

#Put in categories
indf['cat'] = 'edge41'
indf.loc[ indf['edges'].between(0,40),'cat'] = 'edge0'

for cat in cats:
    for escore in escores:

        if cat == 'all':
            tempdf = indf.copy()
        else:
            tempdf = indf[ indf['cat'] == cat ].copy()
        N = tempdf.shape[0]
        
        tempdf.drop(labels=cat_varsin, axis=1,inplace=True)
        tempdf[tempdf < (escore / 100.)] = 0
        tempdf[tempdf >= (escore / 100.)] = 1
        outdf.loc[ (cat,escore) ] = tempdf.mean()
    outdf.loc[ (cat,'N') ] = N
outdf.to_excel(results_file)  

#=============================================================================
#CORRELATIONS CALC
#=============================================================================

#All
corr_file = results_file = 'rp_tests_corr.xlsx'
tempdf = indf.copy()
tempdf = tempdf.drop(['tn', 'pat', 'imple_cs', 'imple_cs_si', 'imple_cs_sp', 'edges', 'corners'],axis=1)
corr_table = tempdf.corr()
corr_table.to_excel(corr_file)

#Spearman
corr_file = results_file = 'rp_tests_corr_spearmn.xlsx'
tempdf = indf.copy()
tempdf = tempdf.drop(['tn', 'pat', 'imple_cs', 'imple_cs_si', 'imple_cs_sp', 'edges', 'corners'],axis=1)
corr_table = tempdf.corr(method='spearman')
corr_table.to_excel(corr_file)
    
#=============================================================================
#COMPARISONS FILE
#=============================================================================
    
comparisons_file = 'rp_tests_comparisons.xlsx'
    
var_names = [ 'def', 'le', 'tn', 'imp', 'pat', 'cn', 'imple', 'imple_ca', \
              'imple_cs', 'imple_si', 'imple_sp', 'imple_ca_si', \
              'imple_ca_sp', 'imple_cs_si', 'imple_cs_sp', 'imp2split', \
              'imple_imp2split']

comparisons_list = [[ 'le', 'tn' ], ['imp', 'pat'], ['imple_ca', 'imple_cs'], 
                    [ 'imple_si', 'imple_sp'], ['imple_ca_si', 'imple_ca_sp'],
                    [ 'imple_cs_si', 'imple_cs_sp']]

var_names = []
for comp in comparisons_list:
    var_names.append( comp[0] + ' > ' + comp[1] )
    var_names.append( comp[1] + ' > ' + comp[0] )
    
comp_df = pd.DataFrame( data = np.nan, index = cats, columns = var_names )
    
for cat in cats:

    if cat == 'all':
        tempdf = indf.copy()
    else:
        tempdf = indf[ indf['cat'] == cat ].copy()
    
    tempdf.drop(labels=cat_varsin,axis=1,inplace=True)
    
    for comp in comparisons_list:
        var0 = tempdf[comp[0]]
        var1 = tempdf[comp[1]]
        comp_df.loc[cat, comp[0] + ' > ' + comp[1]] = \
            (var0 > var1).sum() / var0.count()
        comp_df.loc[cat, comp[1] + ' > ' + comp[0]] = \
            (var1 > var0).sum() / var0.count()

comp_df.to_excel(comparisons_file)

#=============================================================================
#McNemar Tests
#=============================================================================

mcnemar_file = 'rp_tests_mcnemar.xlsx'
comparisons_list = [[ 'le', 'tn' ], ['imp', 'pat'], ['imple_ca', 'imple_cs'], 
            [ 'imple_si', 'imple_sp'], ['imple_ca_si', 'imple_ca_sp'],
            [ 'imple_cs_si', 'imple_cs_sp']]
var_names = [ var[0] + ' to ' + var[1] for var in comparisons_list ]

cats = ['all', 'edge41', 'edge0']
escores = [100,99,95,90]
my_index = pd.MultiIndex.from_product( [cats, escores], names = ['category','escore'] )

mcnemar_df = pd.DataFrame( data = np.nan, index = my_index, columns = var_names )

for cat in cats:
    for escore in escores:

        if cat == 'all':
            tempdf = indf.copy()
        else:
            tempdf = indf[ indf['cat'] == cat ].copy()
                
        for comp in comparisons_list:
            
            var1 = comp[0]; var2 = comp[1]
            var1pass = (tempdf[var1] >= escore / 100. ).astype(int)
            var2pass = (tempdf[var2] >= escore / 100.).astype(int)
            n1 = (var1pass > var2pass).astype(int).sum()
            n2 = (var1pass < var2pass).astype(int).sum()
            normal_arg = ( n1 - n2 ) / ( n1 + n2 )**(0.5)
            result = 1. - norm.cdf( normal_arg )
            mcnemar_df.loc[ (cat, escore), var1 + ' to ' + var2 ] = result
mcnemar_df.to_excel(mcnemar_file)






